GeoVisualization

import pandas
df = pandas.read_csv("./data/shared/covid/covid_combined.csv", index_col="date",
                    parse_dates=True)
df.head()
state fips cases deaths dtc100 population deaths100k
date
2020-01-21 Washington 53 1 0 0.0 7614893 0.0
2020-01-22 Washington 53 1 0 0.0 7614893 0.0
2020-01-23 Washington 53 1 0 0.0 7614893 0.0
2020-01-24 Washington 53 1 0 0.0 7614893 0.0
2020-01-25 Washington 53 1 0 0.0 7614893 0.0
last_df = df.loc['2020-08-02']
last_df.shape
(54, 7)
import geopandas
gdf = geopandas.read_file("./data/shared/covid/gz_2010_us_040_00_500k.json")
gdf.head()
GEO_ID STATE NAME LSAD CENSUSAREA geometry
0 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ...
1 0400000US25 25 Massachusetts 7800.058 MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ...
2 0400000US26 26 Michigan 56538.901 MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ...
3 0400000US30 30 Montana 145545.801 POLYGON ((-104.05770 44.99743, -104.25015 44.9...
4 0400000US32 32 Nevada 109781.180 POLYGON ((-114.05060 37.00040, -114.04999 36.9...
gdf.columns.values
array(['GEO_ID', 'STATE', 'NAME', 'LSAD', 'CENSUSAREA', 'geometry'],
      dtype=object)
gdf.plot()
<AxesSubplot:>

df.head()
state fips cases deaths dtc100 population deaths100k
date
2020-01-21 Washington 53 1 0 0.0 7614893 0.0
2020-01-22 Washington 53 1 0 0.0 7614893 0.0
2020-01-23 Washington 53 1 0 0.0 7614893 0.0
2020-01-24 Washington 53 1 0 0.0 7614893 0.0
2020-01-25 Washington 53 1 0 0.0 7614893 0.0

Table Join

gdf.rename(columns={'NAME': 'state'}, inplace=True)
gdf.head()
GEO_ID STATE state LSAD CENSUSAREA geometry
0 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ...
1 0400000US25 25 Massachusetts 7800.058 MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ...
2 0400000US26 26 Michigan 56538.901 MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ...
3 0400000US30 30 Montana 145545.801 POLYGON ((-104.05770 44.99743, -104.25015 44.9...
4 0400000US32 32 Nevada 109781.180 POLYGON ((-114.05060 37.00040, -114.04999 36.9...
df['date'] = df.index.values
join_gdf = gdf.merge(df, on='state')
join_gdf.head()
GEO_ID STATE state LSAD CENSUSAREA geometry fips cases deaths dtc100 population deaths100k date
0 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 1 0 0.0 1344212 0.0 2020-03-12
1 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 2 0 0.0 1344212 0.0 2020-03-13
2 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 3 0 0.0 1344212 0.0 2020-03-14
3 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 12 0 0.0 1344212 0.0 2020-03-15
4 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 17 0 0.0 1344212 0.0 2020-03-16
last_gdf = join_gdf[join_gdf.date=='2020-08-02']
last_gdf.shape
(52, 13)
drop = ['Puerto Rico', 'Alaska', 'Hawaii']
last_gdf = last_gdf[~last_gdf['state'].isin(drop)]
last_gdf.shape
(49, 13)
last_gdf.plot()
<AxesSubplot:>

last_gdf.plot(column='dtc100')
<AxesSubplot:>

last_gdf.plot(column='dtc100', legend=True, figsize=(16,9))
<AxesSubplot:>

last_gdf.plot(column='dtc100', legend=True, figsize=(16,9),
               scheme='quantiles', k=10)
<AxesSubplot:>

last_gdf.plot(column='dtc100', legend=True, figsize=(16,9),
               scheme='quantiles', k=10,
               legend_kwds={'loc': 'lower right'})
<AxesSubplot:>

ax = last_gdf.plot(column='dtc100', scheme='quantiles',k=10, legend=True,
             figsize=(16,9), cmap='Reds',
             legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()

Classifiers

Equal Interval

ax = last_gdf.plot(column='dtc100', scheme='EqualInterval',k=5, legend=True,
             figsize=(16,9), cmap='Reds',
             legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()

y = last_gdf.dtc100
import mapclassify
ea5 = mapclassify.EqualInterval(y, k=5)
ea5
EqualInterval

  Interval     Count
--------------------
[0.76, 2.39] |    25
(2.39, 4.02] |    13
(4.02, 5.64] |     4
(5.64, 7.27] |     3
(7.27, 8.90] |     4
4.02-2.39
1.6299999999999994
5.64-4.02
1.62

Equal Count (Quantiles)

eq5 = mapclassify.Quantiles(y, k=5)
eq5
Quantiles

  Interval     Count
--------------------
[0.76, 1.49] |    10
(1.49, 1.84] |    10
(1.84, 2.93] |     9
(2.93, 4.23] |    10
(4.23, 8.90] |    10
ax = last_gdf.plot(column='dtc100', scheme='Quantiles',k=5, legend=True,
             figsize=(16,9), cmap='Reds',
             legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()

Maximum Breaks

mb5 = mapclassify.MaximumBreaks(y, k=5)
mb5
MaximumBreaks

  Interval     Count
--------------------
[0.76, 5.03] |    41
(5.03, 5.73] |     1
(5.73, 6.66] |     2
(6.66, 8.15] |     3
(8.15, 8.90] |     2
ax = last_gdf.plot(column='dtc100', scheme='MaximumBreaks',k=5, legend=True,
             figsize=(16,9), cmap='Reds',
             legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()

Box Plot

mapclassify.BoxPlot(y)
BoxPlot

   Interval      Count
----------------------
( -inf, -1.82] |     0
(-1.82,  1.62] |    13
( 1.62,  2.37] |    12
( 2.37,  3.91] |    12
( 3.91,  7.35] |     9
( 7.35,  8.90] |     3
y.median()
2.3743977976600137
ax = last_gdf.plot(column='dtc100', scheme='BoxPlot', legend=True,
             figsize=(16,9), cmap='Reds',
             legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()

mapclassify.BoxPlot?
Init signature: mapclassify.BoxPlot(y, hinge=1.5)
Docstring:     
BoxPlot Map Classification.
Parameters
----------
y : numpy.array
    Attribute to classify
hinge : float (default 1.5)
    Multiplier for *IQR*.
Attributes
----------
yb : numpy.array
    :math:`(n,1)`, bin ids for observations.
bins : array
    :math:`(n,1)`, the upper bounds of each class  (monotonic).
k : int
    The number of classes.
counts : numpy.array
    :math:`(k,1)`, the number of observations falling in each class.
low_outlier_ids : numpy.array
    Indices of observations that are low outliers.
high_outlier_ids : numpy.array
    Indices of observations that are high outliers.
Notes
-----
The bins are set as follows::
    bins[0] = q[0]-hinge*IQR
    bins[1] = q[0]
    bins[2] = q[1]
    bins[3] = q[2]
    bins[4] = q[2]+hinge*IQR
    bins[5] = inf  (see Notes)
where :math:`q` is an array of the first three quartiles of :math:`y` and
:math:`IQR=q[2]-q[0]`.
If :math:`q[2]+hinge*IQR > max(y)` there will only be 5 classes and no high
outliers, otherwise, there will be 6 classes and at least one high
outlier.
Examples
--------
>>> import mapclassify
>>> import numpy
>>> cal = mapclassify.load_example()
>>> bp = mapclassify.BoxPlot(cal)
>>> bp.bins
array([-5.287625e+01,  2.567500e+00,  9.365000e+00,  3.953000e+01,
        9.497375e+01,  4.111450e+03])
>>> list(bp.counts)
[0, 15, 14, 14, 6, 9]
>>> list(bp.high_outlier_ids)
[0, 6, 18, 29, 33, 36, 37, 40, 42]
>>> cal[bp.high_outlier_ids].values
array([ 329.92,  181.27,  370.5 ,  722.85,  192.05,  110.74, 4111.45,
        317.11,  264.93])
>>> bx = mapclassify.BoxPlot(numpy.arange(100))
>>> bx.bins
array([-49.5 ,  24.75,  49.5 ,  74.25, 148.5 ])
Init docstring:
Parameters
----------
y : numpy.array
    :math:`(n,1)`, attribute to classify
hinge : float (default 1.5)
    Multiple of inter-quartile range.
File:           /opt/tljh/user/lib/python3.9/site-packages/mapclassify/classifiers.py
Type:           type
Subclasses:     

STDMEAN

mapclassify.StdMean(y)
StdMean

   Interval      Count
----------------------
( -inf, -1.15] |     0
(-1.15,  0.97] |     3
( 0.97,  5.22] |    38
( 5.22,  7.34] |     5
( 7.34,  8.90] |     3
y.mean()- y.std()
0.9741967014843262
y.mean()+y.std()
5.217609367021282
ax = last_gdf.plot(column='dtc100', scheme='StdMean', legend=True,
             figsize=(16,9), cmap='Reds',
             legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()

Fisher-Jenks

mapclassify.FisherJenks(y, k=5)
FisherJenks

  Interval     Count
--------------------
[0.76, 2.12] |    24
(2.12, 3.34] |     9
(3.34, 5.29] |     9
(5.29, 7.29] |     4
(7.29, 8.90] |     3
ax = last_gdf.plot(column='dtc100', scheme='FisherJenks', k=5, legend=True,
             figsize=(16,9), cmap='Reds',
             legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()

ax = last_gdf.plot(column='dtc100', scheme='FisherJenks', k=5, legend=True,
             figsize=(16,9), cmap='Blues',edgecolor='k',
             legend_kwds={'loc': 'lower right'})
ax.set_title('COVID-19 Deaths per 100 Cases, 2020-08-02')
ax.set_axis_off()

fj5 = mapclassify.FisherJenks(y,5)
q5 = mapclassify.Quantiles(y, 5)
ea5 = mapclassify.EqualInterval(y, 5)
fj5.adcm, q5.adcm, ea5.adcm
(15.68079626959761, 21.603825374669483, 18.970554166382374)

Color: Categorical

c_gdf = last_gdf.to_crs(last_gdf.estimate_utm_crs())
centroids =c_gdf.centroid
east = c_gdf.centroid.x > c_gdf.centroid.x.median()
north = c_gdf.centroid.y > c_gdf.centroid.y.median()
c_gdf['east'] = c_gdf.centroid.x < centroids.x.median()
c_gdf['west'] = c_gdf.centroid.x >= centroids.x.median()
c_gdf['south'] = c_gdf.centroid.y < centroids.y.median()
c_gdf['north'] = c_gdf.centroid.y >= centroids.y.median()
c_gdf['NE'] = c_gdf.north * c_gdf.east
c_gdf['SE'] = c_gdf.south * c_gdf.east
c_gdf['NW'] = c_gdf.north * c_gdf.west
c_gdf['SW'] = c_gdf.south * c_gdf.west
    



c_gdf['region'] = (1 * c_gdf.NE) + (2 * c_gdf.NW) + (3* c_gdf.SW) + (4*c_gdf.SE)
c_gdf.groupby(by='region').count()
GEO_ID STATE state LSAD CENSUSAREA geometry fips cases deaths dtc100 ... date east west south north NE SE NW SW usgreedy
region
1 14 14 14 14 14 14 14 14 14 14 ... 14 14 14 14 14 14 14 14 14 14
2 11 11 11 11 11 11 11 11 11 11 ... 11 11 11 11 11 11 11 11 11 11
3 14 14 14 14 14 14 14 14 14 14 ... 14 14 14 14 14 14 14 14 14 14
4 10 10 10 10 10 10 10 10 10 10 ... 10 10 10 10 10 10 10 10 10 10

4 rows × 22 columns

c_gdf.plot(column='region', categorical=True)
<AxesSubplot:>

c_gdf.plot(column='region')
<AxesSubplot:>


usgreedy = mapclassify.greedy(c_gdf)
c_gdf['usgreedy'] = usgreedy
c_gdf.plot(column='usgreedy', categorical=True)
<AxesSubplot:>

Spatial Dynamics


join_gdf.head()
GEO_ID STATE state LSAD CENSUSAREA geometry fips cases deaths dtc100 population deaths100k date
0 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 1 0 0.0 1344212 0.0 2020-03-12
1 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 2 0 0.0 1344212 0.0 2020-03-13
2 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 3 0 0.0 1344212 0.0 2020-03-14
3 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 12 0 0.0 1344212 0.0 2020-03-15
4 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 17 0 0.0 1344212 0.0 2020-03-16
dates = [f"2020-{month:02}-01"  for month  in range(5,9)]

dates
['2020-05-01', '2020-06-01', '2020-07-01', '2020-08-01']
last4 = join_gdf[join_gdf.date.isin(dates)]
last4 = last4[~last4['state'].isin(drop)]
last4.shape
(196, 13)
for day in dates:
    last4[last4.date==day].plot(column='dtc100', scheme='Quantiles', k=5, 
                                legend=True, legend_kwds={'loc': 'lower right'})

import mapclassify

q5 = mapclassify.Quantiles(last4.dtc100, k=5)

q5
Quantiles

  Interval     Count
--------------------
[0.76, 1.87] |    40
(1.87, 3.12] |    39
(3.12, 4.20] |    39
(4.20, 5.44] |    39
(5.44, 9.45] |    39
q5.bins
array([1.86813187, 3.12420625, 4.20334682, 5.44027239, 9.45494994])
last4['pooled5'] = q5.yb
for day in dates:
    last4[last4.date==day].plot(column='pooled5', scheme='equalinterval',
                                legend=True, legend_kwds={'loc': 'lower right'})

Interactive Maps

last_gdf.columns
Index(['GEO_ID', 'STATE', 'state', 'LSAD', 'CENSUSAREA', 'geometry', 'fips',
       'cases', 'deaths', 'dtc100', 'population', 'deaths100k', 'date'],
      dtype='object')
last_gdf = last_gdf[['GEO_ID', 'STATE', 'state', 'LSAD', 'CENSUSAREA', 'geometry', 'fips',
       'cases', 'deaths', 'dtc100', 'population', 'deaths100k']] # omit the date column
last_gdf.explore()
Make this Notebook Trusted to load map: File -> Trust Notebook
last_gdf.explore(column='dtc100')
Make this Notebook Trusted to load map: File -> Trust Notebook
last_gdf.explore(column='dtc100', scheme='quantiles', k=5)
Make this Notebook Trusted to load map: File -> Trust Notebook
last_gdf.head()
GEO_ID STATE state LSAD CENSUSAREA geometry fips cases deaths dtc100 population deaths100k
143 0400000US23 23 Maine 30842.923 MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ... 23 3958 123 3.107630 1344212 9.150342
327 0400000US25 25 Massachusetts 7800.058 MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ... 25 118458 8638 7.292036 6949503 124.296658
473 0400000US26 26 Michigan 56538.901 MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ... 26 91857 6460 7.032670 9986857 64.685016
616 0400000US30 30 Montana 145545.801 POLYGON ((-104.05770 44.99743, -104.25015 44.9... 30 4193 61 1.454806 1068778 5.707453
767 0400000US32 32 Nevada 109781.180 POLYGON ((-114.05060 37.00040, -114.04999 36.9... 32 50270 833 1.657052 3080156 27.044085
last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
                tooltip=['dtc100', 'state', 'fips'])
Make this Notebook Trusted to load map: File -> Trust Notebook
import folium
centroids = last_gdf.centroid
/tmp/ipykernel_3348826/2951951418.py:1: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  centroids = last_gdf.centroid
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
                tooltip=['dtc100', 'state', 'fips'])
centroids.explore(m=m)
m
Make this Notebook Trusted to load map: File -> Trust Notebook
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
                tooltip=['dtc100', 'state', 'fips'],
                    name='polygon')
centroids.explore(m=m, name='centroid')
folium.LayerControl().add_to(m)
m
Make this Notebook Trusted to load map: File -> Trust Notebook
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
                tooltip=['dtc100', 'state', 'fips'],
                    name='polygon', tiles='Stamen Toner')
centroids.explore(m=m, name='centroid')
folium.LayerControl().add_to(m)
m
Make this Notebook Trusted to load map: File -> Trust Notebook
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
                tooltip=['dtc100', 'state', 'fips'],
                    name='polygon', tiles='Stamen Terrain')
centroids.explore(m=m, name='centroid')
folium.LayerControl().add_to(m)
m
Make this Notebook Trusted to load map: File -> Trust Notebook
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
                tooltip=['dtc100', 'state', 'fips'],
                    name='polygon', tiles='CartoDB positron')
centroids.explore(m=m, name='centroid')
folium.LayerControl().add_to(m)
m
Make this Notebook Trusted to load map: File -> Trust Notebook
m.crs
'EPSG3857'
m = last_gdf.explore(column='dtc100', scheme='quantiles', k=5,
                tooltip=['dtc100', 'state', 'fips'],
                    name='polygon', tiles='CartoDB positron')
centroids.explore(m=m, name='centroid')
folium.Marker([32.7774, -117.0714],
              popup='SDSU',
              icon=folium.Icon(color='red', icon='ok-sign'),).add_to(m)
folium.LayerControl().add_to(m)
m
Make this Notebook Trusted to load map: File -> Trust Notebook